{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "4ed21505-0445-4d7c-b41a-cf48aaf358be",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "#Set your file path here\n",
    "import os\n",
    "thepath=os.getcwd()+r\"\\evs1999_fordataverse.csv\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "21657912-603d-49f0-bd69-59697b6ca56c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>studyno</th>\n",
       "      <th>iso3n</th>\n",
       "      <th>id_cocas</th>\n",
       "      <th>c_abrv</th>\n",
       "      <th>weight_g</th>\n",
       "      <th>v98</th>\n",
       "      <th>o5</th>\n",
       "      <th>Country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>3811</td>\n",
       "      <td>40</td>\n",
       "      <td>1.999040e+11</td>\n",
       "      <td>AT</td>\n",
       "      <td>0.856669</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>3811</td>\n",
       "      <td>40</td>\n",
       "      <td>1.999040e+11</td>\n",
       "      <td>AT</td>\n",
       "      <td>0.612061</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>3811</td>\n",
       "      <td>40</td>\n",
       "      <td>1.999040e+11</td>\n",
       "      <td>AT</td>\n",
       "      <td>1.995994</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>3811</td>\n",
       "      <td>40</td>\n",
       "      <td>1.999040e+11</td>\n",
       "      <td>AT</td>\n",
       "      <td>0.491389</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>3811</td>\n",
       "      <td>40</td>\n",
       "      <td>1.999040e+11</td>\n",
       "      <td>AT</td>\n",
       "      <td>1.081707</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0  studyno  iso3n      id_cocas c_abrv  weight_g  v98  o5 Country\n",
       "0           0     3811     40  1.999040e+11     AT  0.856669  3.0 NaN     NaN\n",
       "1           1     3811     40  1.999040e+11     AT  0.612061  2.0 NaN     NaN\n",
       "2           2     3811     40  1.999040e+11     AT  1.995994  1.0 NaN     NaN\n",
       "3           3     3811     40  1.999040e+11     AT  0.491389  1.0 NaN     NaN\n",
       "4           4     3811     40  1.999040e+11     AT  1.081707  1.0 NaN     NaN"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evs=pd.read_csv(thepath)\n",
    "evs.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "06296d3d-ae6c-4990-b8e2-2837fcc611ef",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>studyno</th>\n",
       "      <th>iso3n</th>\n",
       "      <th>id_cocas</th>\n",
       "      <th>c_abrv</th>\n",
       "      <th>weight_g</th>\n",
       "      <th>v98</th>\n",
       "      <th>o5</th>\n",
       "      <th>Country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>3811</td>\n",
       "      <td>40</td>\n",
       "      <td>1.999040e+11</td>\n",
       "      <td>AT</td>\n",
       "      <td>0.856669</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>3811</td>\n",
       "      <td>40</td>\n",
       "      <td>1.999040e+11</td>\n",
       "      <td>AT</td>\n",
       "      <td>0.612061</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>3811</td>\n",
       "      <td>40</td>\n",
       "      <td>1.999040e+11</td>\n",
       "      <td>AT</td>\n",
       "      <td>1.995994</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>3811</td>\n",
       "      <td>40</td>\n",
       "      <td>1.999040e+11</td>\n",
       "      <td>AT</td>\n",
       "      <td>0.491389</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>3811</td>\n",
       "      <td>40</td>\n",
       "      <td>1.999040e+11</td>\n",
       "      <td>AT</td>\n",
       "      <td>1.081707</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0  studyno  iso3n      id_cocas c_abrv  weight_g  v98  o5 Country\n",
       "0           0     3811     40  1.999040e+11     AT  0.856669  3.0 NaN     NaN\n",
       "1           1     3811     40  1.999040e+11     AT  0.612061  2.0 NaN     NaN\n",
       "2           2     3811     40  1.999040e+11     AT  1.995994  1.0 NaN     NaN\n",
       "3           3     3811     40  1.999040e+11     AT  0.491389  1.0 NaN     NaN\n",
       "4           4     3811     40  1.999040e+11     AT  1.081707  1.0 NaN     NaN"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evs.rename(columns={'country':'iso3n'},inplace=True)\n",
    "evs['Country']=evs['c_abrv'].map({'BY':'Belarus',\n",
    "'BG':'Bulgaria',\n",
    "'HR':'Croatia',\n",
    "'RU':'Russia',\n",
    "'IT':'Italy',\n",
    "'SI':'Slovenia',\n",
    "'ES':'Spain',\n",
    "'PL':'Poland',\n",
    "'TR':'Turkey',\n",
    "'UA':'Ukraine'})\n",
    "evs.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "11733bb5-d877-4a43-9db9-4b99b42b5c59",
   "metadata": {},
   "outputs": [],
   "source": [
    "def codeevsagree(df,avar):\n",
    "    conditions = [\n",
    "      (df[avar]==1)\n",
    "    , (df[avar]==2)\n",
    "    , (df[avar]==3)\n",
    "]\n",
    "    return(conditions)\n",
    "\n",
    "choices3  = [\n",
    "    \"agree\"\n",
    "    , \"disagree\"\n",
    "    , \"neutral\"\n",
    "]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "190c520d-9945-449a-b8f4-82e43f03c4da",
   "metadata": {},
   "outputs": [],
   "source": [
    "# When jobs are scarce, employers should give priority to British (Countries other than UK: please substitute your nationality!) people over immigrants\n",
    "# -5 other missing\n",
    "# -4 question not asked\n",
    "# -3 not applicable\n",
    "# -2 no answer\n",
    "# -1 don't know\n",
    "# 1 agree\n",
    "# 2 disagree\n",
    "# 3 neither\n",
    "evs[\"job_ntnl\"] = np.select(codeevsagree(evs,'v98'), choices3, default=None)\n",
    "# When jobs are scarce, employers should give priority to local people over people from other parts of the country\n",
    "# -5 other missing\n",
    "# -4 question not asked\n",
    "# -3 not applicable\n",
    "# -2 no answer\n",
    "# -1 don't know\n",
    "# 1 agree\n",
    "# 2 disagree\n",
    "# 3 neither\n",
    "evs[\"job_rgnl\"] = np.select(codeevsagree(evs,'o5'), choices3, default=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "9aa2ce5b-debf-4b44-81e1-40da3123c551",
   "metadata": {},
   "outputs": [],
   "source": [
    "conditions = [\n",
    "      (evs['job_ntnl']=='agree') & (evs['job_rgnl']=='agree')\n",
    "    , (evs['job_ntnl']!='agree') & (evs['job_rgnl']=='agree')\n",
    "    ,  (evs['job_ntnl']=='agree') & (evs['job_rgnl']!='agree') & (pd.notna(evs['job_rgnl']))\n",
    "    ,  (evs['job_ntnl']!='agree') & (evs['job_rgnl']!='agree') & (pd.notna(evs['job_rgnl']))\n",
    "]\n",
    "choices = [\"Both\",\"National only\",\"Sub-national only\",\"Neither\"]\n",
    "\n",
    "evs['whichcombo']=np.select(conditions,choices,default=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "b386da7a-9ab3-46de-bc5e-bfe3082ef3ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "def evs_table(avar,df):\n",
    "    use=df.loc[pd.notna(df[avar])].copy()\n",
    "        \n",
    "    use['newsum']=use.groupby(['Country'])['weight_g'].transform('sum')\n",
    "    use['percent_in_category']=use['weight_g']/use['newsum']*100\n",
    "    \n",
    "    toreturn=use.groupby(['Country',avar], as_index=False)['percent_in_category'].agg(['sum']).reset_index()\n",
    "    toreturn=toreturn.copy().pivot(index=['Country'], columns=avar, values='sum').reset_index()\n",
    "\n",
    "    return(toreturn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "c4547439-0b9e-4335-b23e-c34a5f789ae1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>whichcombo</th>\n",
       "      <th>Country</th>\n",
       "      <th>alllocal</th>\n",
       "      <th>allntnl</th>\n",
       "      <th>Both</th>\n",
       "      <th>National only</th>\n",
       "      <th>Sub-national only</th>\n",
       "      <th>Neither</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Belarus</td>\n",
       "      <td>83.867521</td>\n",
       "      <td>60.363248</td>\n",
       "      <td>58.226496</td>\n",
       "      <td>2.136752</td>\n",
       "      <td>25.641026</td>\n",
       "      <td>13.995726</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Bulgaria</td>\n",
       "      <td>86.945339</td>\n",
       "      <td>76.087751</td>\n",
       "      <td>69.154968</td>\n",
       "      <td>6.932783</td>\n",
       "      <td>17.790371</td>\n",
       "      <td>6.121879</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Croatia</td>\n",
       "      <td>89.075492</td>\n",
       "      <td>78.811778</td>\n",
       "      <td>74.750317</td>\n",
       "      <td>4.061460</td>\n",
       "      <td>14.325174</td>\n",
       "      <td>6.863048</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Italy</td>\n",
       "      <td>60.929520</td>\n",
       "      <td>41.062308</td>\n",
       "      <td>36.312564</td>\n",
       "      <td>4.749745</td>\n",
       "      <td>24.616956</td>\n",
       "      <td>34.320735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Poland</td>\n",
       "      <td>91.004591</td>\n",
       "      <td>69.481241</td>\n",
       "      <td>66.238368</td>\n",
       "      <td>3.242874</td>\n",
       "      <td>24.766224</td>\n",
       "      <td>5.752535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Russia</td>\n",
       "      <td>70.823693</td>\n",
       "      <td>69.374168</td>\n",
       "      <td>59.216797</td>\n",
       "      <td>10.157371</td>\n",
       "      <td>11.606896</td>\n",
       "      <td>19.018936</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Slovenia</td>\n",
       "      <td>74.899194</td>\n",
       "      <td>55.241935</td>\n",
       "      <td>48.487903</td>\n",
       "      <td>6.754032</td>\n",
       "      <td>26.411290</td>\n",
       "      <td>18.346774</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Spain</td>\n",
       "      <td>64.211438</td>\n",
       "      <td>50.519931</td>\n",
       "      <td>45.753899</td>\n",
       "      <td>4.766031</td>\n",
       "      <td>18.457539</td>\n",
       "      <td>31.022530</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Turkey</td>\n",
       "      <td>66.101695</td>\n",
       "      <td>67.627119</td>\n",
       "      <td>53.559322</td>\n",
       "      <td>14.067797</td>\n",
       "      <td>12.542373</td>\n",
       "      <td>19.830508</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Ukraine</td>\n",
       "      <td>67.898283</td>\n",
       "      <td>68.822346</td>\n",
       "      <td>56.905359</td>\n",
       "      <td>11.916986</td>\n",
       "      <td>10.992923</td>\n",
       "      <td>20.184731</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "whichcombo   Country   alllocal    allntnl       Both  National only  \\\n",
       "0            Belarus  83.867521  60.363248  58.226496       2.136752   \n",
       "1           Bulgaria  86.945339  76.087751  69.154968       6.932783   \n",
       "2            Croatia  89.075492  78.811778  74.750317       4.061460   \n",
       "3              Italy  60.929520  41.062308  36.312564       4.749745   \n",
       "4             Poland  91.004591  69.481241  66.238368       3.242874   \n",
       "5             Russia  70.823693  69.374168  59.216797      10.157371   \n",
       "6           Slovenia  74.899194  55.241935  48.487903       6.754032   \n",
       "7              Spain  64.211438  50.519931  45.753899       4.766031   \n",
       "8             Turkey  66.101695  67.627119  53.559322      14.067797   \n",
       "9            Ukraine  67.898283  68.822346  56.905359      11.916986   \n",
       "\n",
       "whichcombo  Sub-national only    Neither  \n",
       "0                   25.641026  13.995726  \n",
       "1                   17.790371   6.121879  \n",
       "2                   14.325174   6.863048  \n",
       "3                   24.616956  34.320735  \n",
       "4                   24.766224   5.752535  \n",
       "5                   11.606896  19.018936  \n",
       "6                   26.411290  18.346774  \n",
       "7                   18.457539  31.022530  \n",
       "8                   12.542373  19.830508  \n",
       "9                   10.992923  20.184731  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "theresults=evs_table('whichcombo',evs)\n",
    "theresults['alllocal']=theresults['Both']+theresults['Sub-national only']\n",
    "theresults['allntnl']=theresults['Both']+theresults['National only']\n",
    "\n",
    "theresults[['Country','alllocal',\"allntnl\",\"Both\",\"National only\",\"Sub-national only\",\"Neither\"]]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
